{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "9a7464ea-580b-4bb0-8a9f-8982315df1b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from haystack.document_stores import InMemoryDocumentStore\n",
    "from haystack.nodes import BM25Retriever, FARMReader\n",
    "from haystack.pipelines import ExtractiveQAPipeline\n",
    "from haystack.pipelines.standard_pipelines import TextIndexingPipeline\n",
    "from haystack.utils import fetch_archive_from_http, print_answers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "cea1a793-cd1d-4c66-bc53-108cf0d4be23",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "183\n"
     ]
    }
   ],
   "source": [
    "doc_dir = \"data/got_dataset\"\n",
    "fetch_archive_from_http(\n",
    "            url=\"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt1.zip\",\n",
    "            output_dir=doc_dir,\n",
    "        )\n",
    "files_to_index = [doc_dir + \"/\" + f for f in os.listdir(doc_dir)]\n",
    "print(len(files_to_index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ef385cdc-13e4-402a-a39e-8a6405090733",
   "metadata": {},
   "outputs": [],
   "source": [
    "document_store = InMemoryDocumentStore(use_bm25=True)\n",
    "indexing_pipeline = TextIndexingPipeline(document_store)\n",
    "indexing_pipeline.run_batch(file_paths=files_to_index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "0b7a232c-73be-4c70-8ed4-9e2d39221803",
   "metadata": {},
   "outputs": [],
   "source": [
    "retriever = BM25Retriever(document_store=document_store)\n",
    "reader = FARMReader(model_name_or_path=\"deepset/roberta-base-squad2\", use_gpu=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7e2e48a2-424f-4221-bdcc-12d2dd32429c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Inferencing Samples: 100%|██████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.04 Batches/s]\n"
     ]
    }
   ],
   "source": [
    "pipe = ExtractiveQAPipeline(reader, retriever)\n",
    "prediction = pipe.run(\n",
    "            query=\"Who is the father of Arya Stark?\", params={\"Retriever\": {\"top_k\": 10}, \"Reader\": {\"top_k\": 5}}\n",
    "        )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "191957f7-3605-4a92-9d46-66e1ff2a9901",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'Query: Who is the father of Arya Stark?'\n",
      "'Answers:'\n",
      "[   <Answer {'answer': 'Eddard', 'type': 'extractive', 'score': 0.993372917175293, 'context': \"s Nymeria after a legendary warrior queen. She travels with her father, Eddard, to King's Landing when he is made Hand of the King. Before she leaves,\", 'offsets_in_document': [{'start': 207, 'end': 213}], 'offsets_in_context': [{'start': 72, 'end': 78}], 'document_ids': ['9e3c863097d66aeed9992e0b6bf1f2f4'], 'meta': {'_split_id': 3}}>,\n",
      "    <Answer {'answer': 'Ned', 'type': 'extractive', 'score': 0.9753614664077759, 'context': \"k in the television series.\\n\\n====Season 1====\\nArya accompanies her father Ned and her sister Sansa to King's Landing. Before their departure, Arya's h\", 'offsets_in_document': [{'start': 630, 'end': 633}], 'offsets_in_context': [{'start': 74, 'end': 77}], 'document_ids': ['7d3360fa29130e69ea6b2ba5c5a8f9c8'], 'meta': {'_split_id': 10}}>,\n",
      "    <Answer {'answer': 'Lord Eddard Stark', 'type': 'extractive', 'score': 0.9177324771881104, 'context': 'rk daughters.\\n\\nDuring the Tourney of the Hand to honour her father Lord Eddard Stark, Sansa Stark is enchanted by the knights performing in the event.', 'offsets_in_document': [{'start': 280, 'end': 297}], 'offsets_in_context': [{'start': 67, 'end': 84}], 'document_ids': ['5dbccad397381605eba063f71dd500a6'], 'meta': {'_split_id': 3}}>,\n",
      "    <Answer {'answer': 'Ned', 'type': 'extractive', 'score': 0.8396495580673218, 'context': \" girl disguised as a boy all along and is surprised to learn she is Arya, Ned Stark's daughter. After the Goldcloaks get help from Ser Amory Lorch and\", 'offsets_in_document': [{'start': 848, 'end': 851}], 'offsets_in_context': [{'start': 74, 'end': 77}], 'document_ids': ['257088f56d2faba55e2ef2ebd19502dc'], 'meta': {'_split_id': 31}}>,\n",
      "    <Answer {'answer': 'King Robert', 'type': 'extractive', 'score': 0.6922307014465332, 'context': \"en refuses to yield Gendry, who is actually a bastard son of the late King Robert, to the Lannisters.  The Night's Watch convoy is overrun and massacr\", 'offsets_in_document': [{'start': 579, 'end': 590}], 'offsets_in_context': [{'start': 70, 'end': 81}], 'document_ids': ['4d51b1876e8a7eac8132b97e2af04401'], 'meta': {'_split_id': 4}}>]\n"
     ]
    }
   ],
   "source": [
    "print_answers(prediction, details=\"all\")  ## Choose from `minimum`, `medium`, and `all`"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "chapter09",
   "language": "python",
   "name": "chapter09"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
